from datasets import load_dataset

books = load_dataset("opus_books", "en-fr", split='train[:5000]')
promptfile = open("prompts.txt", "w")
groundtruthfile = open("groundtruthfile.txt", "w")

for book in books:
    en = book['translation']['en'].replace("\n", "\\n")
    fr = book['translation']['fr'].replace("\n", "\\n")
    if (len(en.split()) > 30 and len(fr.split()) > 30):
        promptfile.write(fr + "\n")
        groundtruthfile.write(en + "\n")

promptfile.close()
groundtruthfile.close()